def test_scheduler_event_handlers(mocker):
    sched = mocker.Mock()
    driver = mocker.Mock()
    proxy = SchedulerProxy(sched)

    proxy.executorLost(driver, mesos_pb2.ExecutorInfo(),
                       mesos_pb2.ExecutorID(), 1)
    proxy.frameworkMessage(driver, mesos_pb2.ExecutorID(),
                           mesos_pb2.SlaveID(), 'message')
    proxy.offerRescinded(driver, mesos_pb2.OfferID())
    proxy.registered(driver, mesos_pb2.FrameworkID(), mesos_pb2.MasterInfo())
    proxy.resourceOffers(driver, [mesos_pb2.Offer(), mesos_pb2.Offer()])
    proxy.slaveLost(driver, mesos_pb2.SlaveID())
    proxy.statusUpdate(driver, mesos_pb2.TaskStatus())
    proxy.reregistered(driver, mesos_pb2.MasterInfo())
    proxy.error(driver, 'message')
    proxy.disconnected(driver)

    sched.on_executor_lost.assert_called_once()
    sched.on_message.assert_called_once()
    sched.on_rescinded.assert_called_once()
    sched.on_registered.assert_called_once()
    sched.on_offers.assert_called_once()
    sched.on_slave_lost.assert_called_once()
    sched.on_update.assert_called_once()
    sched.on_reregistered.assert_called_once()
    sched.on_error.assert_called_once()
    sched.on_disconnected.assert_called_once()
Beispiel #2
0
    def _schedule_accepted_tasks(self):
        """Schedules all of the tasks that have been accepted

        :returns: The number of Mesos tasks that were scheduled
        :rtype: int
        """

        tasks_to_launch = {}  # {Node ID: [Mesos Tasks]}
        queued_job_exes_to_schedule = []
        node_offers_list = self._offer_manager.pop_offers_with_accepted_job_exes(
        )
        for node_offers in node_offers_list:
            mesos_tasks = []
            tasks_to_launch[node_offers.node.id] = mesos_tasks
            # Start next task for already running job executions that were accepted
            for running_job_exe in node_offers.get_accepted_running_job_exes():
                task = running_job_exe.start_next_task()
                if task:
                    mesos_tasks.append(create_mesos_task(task))
            # Gather up queued job executions that were accepted
            for queued_job_exe in node_offers.get_accepted_new_job_exes():
                queued_job_exes_to_schedule.append(queued_job_exe)

        try:
            # Schedule queued job executions and start their first tasks
            workspaces = self._workspace_manager.get_workspaces()
            scheduled_job_exes = self._schedule_queued_job_executions(
                queued_job_exes_to_schedule, workspaces)
            self._job_exe_manager.add_job_exes(scheduled_job_exes)
            for scheduled_job_exe in scheduled_job_exes:
                task = scheduled_job_exe.start_next_task()
                if task:
                    tasks_to_launch[scheduled_job_exe.node_id].append(
                        create_mesos_task(task))
        except OperationalError:
            logger.exception('Failed to schedule queued job executions')

        # Launch tasks on Mesos
        total_num_tasks = 0
        total_num_nodes = 0
        for node_offers in node_offers_list:
            task_list = tasks_to_launch[node_offers.node.id]
            num_tasks = len(task_list)
            total_num_tasks += num_tasks
            if num_tasks:
                total_num_nodes += 1
            mesos_offer_ids = []
            for offer_id in node_offers.offer_ids:
                mesos_offer_id = mesos_pb2.OfferID()
                mesos_offer_id.value = offer_id
                mesos_offer_ids.append(mesos_offer_id)
            self._driver.launchTasks(mesos_offer_ids, task_list)
        if total_num_tasks:
            logger.info('Launched %i Mesos task(s) on %i node(s)',
                        total_num_tasks, total_num_nodes)
        return total_num_tasks
 def setUp(self):
     self.framework_id = mesos_pb2.FrameworkID(value=self.FRAMEWORK_ID)
     self.framework_info = mesos_pb2.FrameworkInfo(
         user='******',
         name='fake_framework_name',
     )
     self.command_info = mesos_pb2.CommandInfo(value='fake-command')
     self.executor_id = mesos_pb2.ExecutorID(value='fake-executor-id')
     self.executor_info = mesos_pb2.ExecutorInfo(
         executor_id=self.executor_id,
         framework_id=self.framework_id,
         command=self.command_info,
     )
     self.slave_id = mesos_pb2.SlaveID(value='fake-slave-id')
     self.offer_id = mesos_pb2.OfferID(value='1')
Beispiel #4
0
    def run(self):
        """The main run loop of the thread
        """

        logger.info('Scheduling thread started')

        while self._running:

            started = now()

            num_tasks = 0
            try:
                num_tasks = self._perform_scheduling()
            except Exception:
                logger.exception('Critical error in scheduling thread')

            duration = now() - started
            msg = 'Scheduling thread loop took %.3f seconds'
            if duration > SchedulingThread.SCHEDULE_LOOP_WARN_THRESHOLD:
                logger.warning(msg, duration.total_seconds())
            else:
                logger.debug(msg, duration.total_seconds())

            if num_tasks == 0:
                # Since we didn't schedule anything, give resources back to Mesos and pause a moment
                try:
                    for node_offers in offer_mgr.pop_all_offers():
                        for offer_id in node_offers.offer_ids:
                            mesos_offer_id = mesos_pb2.OfferID()
                            mesos_offer_id.value = offer_id
                            self._driver.declineOffer(mesos_offer_id)
                except Exception:
                    logger.exception('Critical error in scheduling thread')

                logger.debug('Scheduling thread is pausing for %i second(s)', SchedulingThread.DELAY)
                time.sleep(SchedulingThread.DELAY)

        logger.info('Scheduling thread stopped')
Beispiel #5
0
    def _launch_tasks(self, driver, nodes):
        """Launches all of the tasks that have been scheduled on the given nodes

        :param driver: The Mesos scheduler driver
        :type driver: :class:`mesos_api.mesos.SchedulerDriver`
        :param nodes: The dict of all scheduling nodes stored by node ID
        :type nodes: dict
        :returns: The number of tasks that were launched and the number of offers accepted
        :rtype: tuple
        """

        started = now()

        # Start and launch tasks in the task manager
        all_tasks = []
        for node in nodes.values():
            node.start_job_exe_tasks()
            all_tasks.extend(node.allocated_tasks)
        task_mgr.launch_tasks(all_tasks, started)

        # Launch tasks in Mesos
        node_count = 0
        total_node_count = 0
        total_offer_count = 0
        total_task_count = 0
        total_offer_resources = NodeResources()
        total_task_resources = NodeResources()
        for node in nodes.values():
            mesos_offer_ids = []
            mesos_tasks = []
            offers = node.allocated_offers
            for offer in offers:
                total_offer_count += 1
                total_offer_resources.add(offer.resources)
                mesos_offer_id = mesos_pb2.OfferID()
                mesos_offer_id.value = offer.id
                mesos_offer_ids.append(mesos_offer_id)
            tasks = node.allocated_tasks
            for task in tasks:
                total_task_resources.add(task.get_resources())
                mesos_tasks.append(create_mesos_task(task))
            task_count = len(tasks)
            total_task_count += task_count
            if task_count:
                node_count += 1
            if mesos_offer_ids:
                total_node_count += 1
                try:
                    driver.launchTasks(mesos_offer_ids, mesos_tasks)
                except Exception:
                    logger.exception(
                        'Error occurred while launching tasks on node %s',
                        node.hostname)

        duration = now() - started
        msg = 'Launching tasks took %.3f seconds'
        if duration > LAUNCH_TASK_WARN_THRESHOLD:
            logger.warning(msg, duration.total_seconds())
        else:
            logger.debug(msg, duration.total_seconds())

        declined_resources = NodeResources()
        declined_resources.add(total_offer_resources)
        declined_resources.subtract(total_task_resources)
        if total_offer_count:
            logger.info(
                'Accepted %d offer(s) from %d node(s), launched %d task(s) with %s on %d node(s), declined %s',
                total_offer_count, total_node_count, total_task_count,
                total_task_resources, node_count, declined_resources)
        return total_task_count, total_offer_count