Esempio n. 1
0
    def test_midstreamSchedule_taskAlreadyRunning(self):
        """
    There is 1 machine, and a task is running on it for the next 200 ticks.
    Ensure the scheduler doesn't schedule anything on it until the existing
    task is done.

    Note: In this test, the already-running task has no dependency relationships
    with other tasks that might be scheduled.
    """
        machines = [Machine(0)]
        configuration = SystemConfiguration(machines)
        task = scheduling.Task(0, {machines[0]: 300})
        query = Query(0, [JobDag(JobNode(Job(0, [task], 1), []))])
        runningTask = scheduling.Task(1, {machines[0]: 1000})
        runningQuery = Query(1,
                             [JobDag(JobNode(Job(1, [runningTask], 1), []))])
        queue = [query, runningQuery]
        systemState = SystemState(configuration, 0, queue,
                                  [RunningTask(runningTask, machines[0], 200)],
                                  [])

        scheduler = OnlineGreedyScheduler()
        schedule = scheduler.handleNewQuery(systemState)

        self.assertTrue(
            schedule.approxEquals(
                Schedule([ScheduledTask(task, machines[0], 200)]), EPSILON),
            "actual=%s" % schedule)
Esempio n. 2
0
    def test_initialSchedule_2machines(self):
        """
    The scheduler is asked for a schedule when no tasks are currently running
    and there are two queries in the queue.  Query 0 has 1 job with 1 task,
    which will take 200 ticks on machine 0 and 300 ticks on machine 1.  Query 1
    has 1 job with 1 task, which will take 300 ticks on machine 0 and 200 ticks
    on machine 1.

    The scheduler should schedule both tasks from the start, on different
    machines.
    """
        machines = [Machine(0), Machine(1)]
        configuration = SystemConfiguration(machines)
        query0Task0 = scheduling.Task(0, {machines[0]: 200, machines[1]: 300})
        query0 = Query(0, [JobDag(JobNode(Job(0, [query0Task0], 1), []))])
        query1Task0 = scheduling.Task(0, {machines[0]: 300, machines[1]: 200})
        query1 = Query(1, [JobDag(JobNode(Job(1, [query1Task0], 1), []))])
        queue = [query0, query1]
        systemState = SystemState(configuration, 0, queue, [], [])

        scheduler = OnlineGreedyScheduler()
        schedule = scheduler.handleNewQuery(systemState)

        self.assertTrue(
            schedule.approxEquals(
                Schedule([
                    ScheduledTask(query0Task0, machines[0], 0),
                    ScheduledTask(query1Task0, machines[1], 0)
                ]), EPSILON), "actual=%s" % schedule)
Esempio n. 3
0
    def test_initialSchedule_simple(self):
        """
    The scheduler is asked for a schedule when no tasks are currently running
    and there are two queries in the queue.  Query 0 has 1 job with 1 task that
    will take 1000 ticks.  Query 1 has 1 job with 1 task that will take 200
    ticks.  There is 1 machine.  The scheduler should schedule query 1's task
    and then query 0's task.
    """
        machines = [Machine(0)]
        configuration = SystemConfiguration(machines)
        query0Task0 = scheduling.Task(0, {machines[0]: 1000})
        query0 = Query(0, [JobDag(JobNode(Job(0, [query0Task0], 1), []))])
        query1Task0 = scheduling.Task(1, {machines[0]: 200})
        query1 = Query(1, [JobDag(JobNode(Job(1, [query1Task0], 1), []))])
        queue = [query0, query1]
        systemState = SystemState(configuration, 0, queue, [], [])

        scheduler = OnlineGreedyScheduler()
        schedule = scheduler.handleNewQuery(systemState)

        self.assertTrue(
            schedule.approxEquals(
                Schedule([
                    ScheduledTask(query1Task0, machines[0], 0),
                    ScheduledTask(query0Task0, machines[0], 200)
                ]), EPSILON), "actual=%s" % schedule)
Esempio n. 4
0
    def test_initialSchedule_2machines_2tasks(self):
        """
    The scheduler is asked for a schedule when no tasks are currently running
    and there are two queries in the queue.  Query 0 has 1 job with 2 tasks, one
    of which will take 200 ticks on any machine, and the other of which will
    take 1200 ticks on any machine.  Query 1 has 1 job with 2 tasks, both of
    which take 1000 ticks on any machine.  There are 2 machines.  All values
    are perturbed slightly so that there is a unique best solution.

    The scheduler should schedule query 1's tasks and then query 0's tasks,
    since this will minimize average finishing time.
    """
        machines = [Machine(0), Machine(1)]
        configuration = SystemConfiguration(machines)
        query0Task0 = scheduling.Task(0, {machines[0]: 200, machines[1]: 201})
        query0Task1 = scheduling.Task(1, {
            machines[0]: 1201,
            machines[1]: 1200
        })
        query0 = Query(
            0, [JobDag(JobNode(Job(0, [query0Task0, query0Task1], 2), []))])
        query1Task0 = scheduling.Task(2, {
            machines[0]: 1000,
            machines[1]: 1001
        })
        query1Task1 = scheduling.Task(3, {
            machines[0]: 1001,
            machines[1]: 1000
        })
        query1 = Query(
            1, [JobDag(JobNode(Job(1, [query1Task0, query1Task1], 2), []))])
        queue = [query0, query1]
        systemState = SystemState(configuration, 0, queue, [], [])

        scheduler = OnlineGreedyScheduler()
        schedule = scheduler.handleNewQuery(systemState)

        self.assertTrue(
            schedule.approxEquals(
                Schedule([
                    ScheduledTask(query1Task0, machines[0], 0),
                    ScheduledTask(query1Task1, machines[1], 0),
                    ScheduledTask(query0Task0, machines[0], 1000),
                    ScheduledTask(query0Task1, machines[1], 1000)
                ]), EPSILON), "actual=%s" % schedule)
Esempio n. 5
0
    def test_midstreamSchedule_taskAlreadyFinished(self):
        """
    A query in the queue has 1 task that has already finished.  Ensure this
    task is ignored properly.  In particular, it should not run, and it should
    not be treated as a dependency for the query.
    """
        machines = [Machine(0)]
        configuration = SystemConfiguration(machines)
        task = scheduling.Task(0, {machines[0]: 300})
        completedTask = scheduling.Task(1, {machines[0]: 1000})
        query = Query(0,
                      [JobDag(JobNode(Job(0, [task, completedTask], 2), []))])
        queue = [query]
        systemState = SystemState(configuration, 0, queue, [], [completedTask])

        scheduler = OnlineGreedyScheduler()
        schedule = scheduler.handleNewQuery(systemState)

        self.assertTrue(
            schedule.approxEquals(
                Schedule([ScheduledTask(task, machines[0], 0)]), EPSILON),
            "actual=%s" % schedule)
Esempio n. 6
0
 def handleTask(task):
     taskId = task.getId()
     if preexistingAssignments.has_key(taskId):
         assignment = preexistingAssignments[taskId]
         return [
             scheduling.Task(
                 task.getId(),
                 defaultdict(int, {
                     assignment.getMachine():
                     assignment.getRemainingTime()
                 }))
         ]
     else:
         return [task]
Esempio n. 7
0
    def test_midstreamSchedule_nonzeroTime(self):
        """
    The current time is nonzero.  Ensure the scheduler produces a schedule that
    starts at the current time (rather than at time 0, for example).  The
    other setup is arbitrary.
    """
        machines = [Machine(0)]
        configuration = SystemConfiguration(machines)
        task = scheduling.Task(0, {machines[0]: 1000})
        query = Query(0, [JobDag(JobNode(Job(0, [task], 1), []))])
        queue = [query]
        systemState = SystemState(configuration, 100, queue, [], [])

        scheduler = OnlineGreedyScheduler()
        schedule = scheduler.handleNewQuery(systemState)

        self.assertTrue(
            schedule.approxEquals(
                Schedule([ScheduledTask(task, machines[0], 100)]), EPSILON),
            "actual=%s" % schedule)
Esempio n. 8
0
 def addFakeRoot(originalJobDag):
     fakeRootJob = Job(None, [scheduling.Task(None, defaultdict(int))], 1)
     return JobDag(JobNode(fakeRootJob, [originalJobDag.getRoot()]))