Exemple #1
0
    def _schedule_accepted_tasks(self):
        """Schedules all of the tasks that have been accepted

        :returns: The number of Mesos tasks that were scheduled
        :rtype: int
        """

        when = now()
        tasks_to_launch = {}  # {Node ID: [Mesos Tasks]}
        queued_job_exes_to_schedule = []
        node_offers_list = offer_mgr.pop_offers_with_accepted_job_exes()
        for node_offers in node_offers_list:
            mesos_tasks = []
            tasks_to_launch[node_offers.node.id] = mesos_tasks
            # Add cleanup tasks
            for task in node_offers.get_accepted_tasks():
                task.launch(when)
                mesos_tasks.append(create_mesos_task(task))
            # Start next task for already running job executions that were accepted
            for running_job_exe in node_offers.get_accepted_running_job_exes():
                task = running_job_exe.start_next_task()
                if task:
                    task.launch(when)
                    mesos_tasks.append(create_mesos_task(task))
            # Gather up queued job executions that were accepted
            for queued_job_exe in node_offers.get_accepted_new_job_exes():
                queued_job_exes_to_schedule.append(queued_job_exe)

        try:
            # Schedule queued job executions and start their first tasks
            workspaces = workspace_mgr.get_workspaces()
            scheduled_job_exes = self._schedule_queued_job_executions(queued_job_exes_to_schedule, workspaces)
            running_job_mgr.add_job_exes(scheduled_job_exes)
            for scheduled_job_exe in scheduled_job_exes:
                task = scheduled_job_exe.start_next_task()
                if task:
                    task.launch(when)
                    tasks_to_launch[scheduled_job_exe.node_id].append(create_mesos_task(task))
        except OperationalError:
            logger.exception('Failed to schedule queued job executions')

        # Launch tasks on Mesos
        total_num_tasks = 0
        total_num_nodes = 0
        for node_offers in node_offers_list:
            task_list = tasks_to_launch[node_offers.node.id]
            num_tasks = len(task_list)
            total_num_tasks += num_tasks
            if num_tasks:
                total_num_nodes += 1
            mesos_offer_ids = []
            for offer_id in node_offers.offer_ids:
                mesos_offer_id = mesos_pb2.OfferID()
                mesos_offer_id.value = offer_id
                mesos_offer_ids.append(mesos_offer_id)
            self._driver.launchTasks(mesos_offer_ids, task_list)
        if total_num_tasks:
            logger.info('Launched %i Mesos task(s) on %i node(s)', total_num_tasks, total_num_nodes)
        return total_num_tasks
Exemple #2
0
    def perform_scheduling(self, client, when):
        """Organizes and analyzes the cluster resources, schedules new job executions, and launches tasks

        :param client: The Mesos scheduler client
        :type client: :class:`mesoshttp.client.MesosClient`
        :param when: The current time
        :type when: :class:`datetime.datetime`
        :returns: The number of tasks that were scheduled
        :rtype: int
        """
        # Get framework ID first to make sure it doesn't change throughout scheduling process
        framework_id = scheduler_mgr.framework_id
        if not framework_id or not client or not client.get_driver():
            # Don't schedule anything until the scheduler has connected to Mesos
            logger.warning(
                'Scheduler not connected to Mesos. Scheduling delayed until connection established.'
            )
            return 0

        resource_mgr.update_all_cluster_resources()

        job_types = job_type_mgr.get_job_types()
        job_type_resources = job_type_mgr.get_job_type_resources()
        tasks = task_mgr.get_all_tasks()
        running_job_exes = job_exe_mgr.get_running_job_exes()
        workspaces = workspace_mgr.get_workspaces()
        nodes = self._prepare_nodes(tasks, running_job_exes, when)
        fulfilled_nodes = self._schedule_waiting_tasks(nodes, running_job_exes,
                                                       when)

        sys_tasks_scheduled = self._schedule_system_tasks(
            fulfilled_nodes, job_type_resources, when)

        job_exe_count = 0
        if sys_tasks_scheduled:
            # Only schedule new job executions if all needed system tasks have been scheduled
            job_type_limits = self._calculate_job_type_limits(
                job_types, running_job_exes)
            job_exe_count = self._schedule_new_job_exes(
                framework_id, fulfilled_nodes, job_types, job_type_limits,
                job_type_resources, workspaces)
        else:
            logger.warning('No new jobs scheduled due to waiting system tasks')
            scheduler_mgr.warning_active(WAITING_SYSTEM_TASKS)

        if framework_id != scheduler_mgr.framework_id:
            logger.warning(
                'Scheduler framework ID changed, skipping task launch')
            return 0

        self._allocate_offers(nodes)
        declined = resource_mgr.decline_offers()
        self._decline_offers(declined)
        task_count, offer_count = self._launch_tasks(client, nodes)
        scheduler_mgr.add_scheduling_counts(job_exe_count, task_count,
                                            offer_count)
        return task_count
Exemple #3
0
    def perform_scheduling(self, driver, when):
        """Organizes and analyzes the cluster resources, schedules new job executions, and launches tasks

        :param driver: The Mesos scheduler driver
        :type driver: :class:`mesos_api.mesos.SchedulerDriver`
        :param when: The current time
        :type when: :class:`datetime.datetime`
        :returns: The number of tasks that were scheduled
        :rtype: int
        """

        # Get framework ID first to make sure it doesn't change throughout scheduling process
        framework_id = scheduler_mgr.framework_id

        job_types = job_type_mgr.get_job_types()
        job_type_resources = job_type_mgr.get_job_type_resources()
        tasks = task_mgr.get_all_tasks()
        running_job_exes = job_exe_mgr.get_running_job_exes()
        workspaces = workspace_mgr.get_workspaces()

        nodes = self._prepare_nodes(tasks, running_job_exes, when)
        fulfilled_nodes = self._schedule_waiting_tasks(nodes, running_job_exes,
                                                       when)

        sys_tasks_scheduled = self._schedule_system_tasks(
            fulfilled_nodes, job_type_resources, when)

        job_exe_count = 0
        if sys_tasks_scheduled:
            # Only schedule new job executions if all needed system tasks have been scheduled
            job_type_limits = self._calculate_job_type_limits(
                job_types, running_job_exes)
            job_exe_count = self._schedule_new_job_exes(
                framework_id, fulfilled_nodes, job_types, job_type_limits,
                job_type_resources, workspaces)
        else:
            # TODO: this is a good place for a scheduler warning in the status JSON
            logger.warning('No new jobs scheduled due to waiting system tasks')

        if framework_id != scheduler_mgr.framework_id:
            logger.warning(
                'Scheduler framework ID changed, skipping task launch')
            return 0

        self._allocate_offers(nodes)
        task_count, offer_count = self._launch_tasks(driver, nodes)
        scheduler_mgr.add_scheduling_counts(job_exe_count, task_count,
                                            offer_count)
        return task_count
Exemple #4
0
    def _schedule_accepted_tasks(self):
        """Schedules all of the tasks that have been accepted

        :returns: The number of Mesos tasks that were scheduled
        :rtype: int
        """

        when = now()
        tasks = []
        tasks_to_launch = {}  # {Node ID: [Mesos Tasks]}
        queued_job_exes_to_schedule = []
        node_offers_list = offer_mgr.pop_offers_with_accepted_job_exes()
        for node_offers in node_offers_list:
            mesos_tasks = []
            tasks_to_launch[node_offers.node.id] = mesos_tasks
            # Add cleanup tasks
            for task in node_offers.get_accepted_tasks():
                tasks.append(task)
                mesos_tasks.append(create_mesos_task(task))
            # Start next task for already running job executions that were accepted
            for running_job_exe in node_offers.get_accepted_running_job_exes():
                task = running_job_exe.start_next_task()
                if task:
                    tasks.append(task)
                    mesos_tasks.append(create_mesos_task(task))
            # Gather up queued job executions that were accepted
            for queued_job_exe in node_offers.get_accepted_new_job_exes():
                queued_job_exes_to_schedule.append(queued_job_exe)

        try:
            # Schedule queued job executions and start their first tasks
            workspaces = workspace_mgr.get_workspaces()
            scheduled_job_exes = self._schedule_queued_job_executions(queued_job_exes_to_schedule, workspaces)
            job_exe_mgr.schedule_job_exes(scheduled_job_exes)
            for scheduled_job_exe in scheduled_job_exes:
                task = scheduled_job_exe.start_next_task()
                if task:
                    tasks.append(task)
                    tasks_to_launch[scheduled_job_exe.node_id].append(create_mesos_task(task))
        except OperationalError:
            logger.exception('Failed to schedule queued job executions')

        # Launch tasks on Mesos
        task_mgr.launch_tasks(tasks, when)
        total_num_tasks = 0
        total_num_nodes = 0
        for node_offers in node_offers_list:
            task_list = tasks_to_launch[node_offers.node.id]
            num_tasks = len(task_list)
            total_num_tasks += num_tasks
            if num_tasks:
                total_num_nodes += 1
            mesos_offer_ids = []
            for offer_id in node_offers.offer_ids:
                mesos_offer_id = mesos_pb2.OfferID()
                mesos_offer_id.value = offer_id
                mesos_offer_ids.append(mesos_offer_id)
            self._driver.launchTasks(mesos_offer_ids, task_list)
        if total_num_tasks:
            logger.info('Launched %i Mesos task(s) on %i node(s)', total_num_tasks, total_num_nodes)
        return total_num_tasks